library(readxl)
library(xlsx)
library(sjPlot)
library(ggplot2)
library(lme4)
## Loading required package: Matrix
library(stringr)
library(ggExtra)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# linguistic data
if (rstudioapi::isAvailable()){
  currdir = dirname(rstudioapi::getSourceEditorContext()$path)
} else {
  currdir = getwd()
}
file_path = file.path(dirname(dirname(currdir)), 'data/extracted_data_3.xlsx')
data <- read_excel(file_path)
# Handling data
data$Agent = ifelse(data$conv == 1,"H","R")
data = data[!(data$locutor %in% c(1,4,19,23)),]
# Adding / renaming columns
data$Trial = data$conv_id_unif
data$Trial2 = paste0('t', str_pad(data$Trial, 2, pad = "0"))
# extra columns will add themselves automatically - just creating structures
df2 = data.frame(mean=numeric(26),
                std=numeric(26), 
                mean_r=numeric(26),
                std_r=numeric(26),
                mean_h=numeric(26),
                std_h=numeric(26),
                row.names = c('lexical_richness_part','lexical_richness_conv','linguistic_complexity_part','linguistic_complexity_conv','content_complexity_part','content_complexity_conv','ratio_silence_lgth_part','ratio_silence_lgth_conv','sum_ipu_lgth_part','sum_ipu_lgth_conv','qt_discourse_part','qt_discourse_conv','qt_feedback_part','qt_feedback_conv','qt_filled_pause_part','qt_filled_pause_conv','ratio_discourse_part','ratio_discourse_conv','nratio_feedback_part','nratio_feedback_conv','ratio_filled_pause_part','ratio_filled_pause_conv','mean_ipu_lgth_part','mean_ipu_lgth_conv','speech_rate_min4_part','speech_rate_min4_conv'),
                stringsAsFactors=FALSE)

lexical_richness

ggplot(data, aes(x = lexical_richness, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = lexical_richness, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data, 
       aes(x = Agent,
           fill = Agent,  
           y = lexical_richness)) +
  stat_summary(fun.y = mean,
               geom = "bar") +
  stat_summary(fun.ymin = function(x) mean(x) - sd(x), 
               fun.ymax = function(x) mean(x) + sd(x), 
               geom="errorbar", 
               width = 0.25) +
  facet_wrap(~tier) +
  labs(x = "Agent",
       y = 'lexical_richness')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'lexical_richness'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'lexical_richness'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## boundary (singular) fit: see ?isSingular
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
##    Data: merres
## 
## REML criterion at convergence: -1260.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8290 -0.6602 -0.0551  0.6464  6.9743 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.  Corr
##  locutor  (Intercept) 7.326e-04 2.707e-02     
##           Trial       8.066e-09 8.981e-05 1.00
##  Residual             4.210e-03 6.489e-02     
## Number of obs: 504, groups:  locutor, 21
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)       0.150329   0.016123   9.324
## data_conv         0.178702   0.077603   2.303
## AgentR            0.011170   0.018854   0.592
## Trial             0.002416   0.000856   2.822
## data_conv:AgentR -0.054478   0.092948  -0.586
## 
## Correlation of Fixed Effects:
##             (Intr) dt_cnv AgentR Trial 
## data_conv   -0.847                     
## AgentR      -0.686  0.706              
## Trial       -0.369  0.105  0.188       
## dt_cnv:AgnR  0.741 -0.841 -0.939 -0.185
## convergence code: 0
## boundary (singular) fit: see ?isSingular
tab_model(mdl, title = paste("part ~ conv ", 'lexical_richness'))
part ~ conv lexical_richness
  data_part
Predictors Estimates CI p
(Intercept) 0.15 0.12 – 0.18 <0.001
data_conv 0.18 0.03 – 0.33 0.021
Agent [R] 0.01 -0.03 – 0.05 0.554
Trial 0.00 0.00 – 0.00 0.005
data_conv * Agent [R] -0.05 -0.24 – 0.13 0.558
Random Effects
σ2 0.00
τ00 locutor 0.00
τ11 locutor.Trial 0.00
ρ01 locutor 1.00
N locutor 21
Observations 504
Marginal R2 / Conditional R2 0.048 / NA
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'lexical_richness'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = cbind(s,l)

# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
    df2[paste0('lexical_richness_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
    df2[paste0('lexical_richness_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
    df2[paste0('lexical_richness_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
    df2[paste0('lexical_richness_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
    df2[paste0('lexical_richness_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
    df2[paste0('lexical_richness_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
       aes(x = data_conv,
           y = data_part,
           color = Agent)) +
  theme_bw()

# Colored scatterplot and regression lines
basic_plot +
  geom_point(alpha = .3, 
             size = .9) +
  geom_smooth(method = "lm") +
  labs(x = "VI: lexical_richness Conv",
       y = "VD: lexical_richness Part",
       color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: lexical_richness Conv",
            y = "VD: lexical_richness Part",
            color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

linguistic_complexity

ggplot(data, aes(x = linguistic_complexity, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = linguistic_complexity, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data, 
       aes(x = Agent,
           fill = Agent,  
           y = linguistic_complexity)) +
  stat_summary(fun.y = mean,
               geom = "bar") +
  stat_summary(fun.ymin = function(x) mean(x) - sd(x), 
               fun.ymax = function(x) mean(x) + sd(x), 
               geom="errorbar", 
               width = 0.25) +
  facet_wrap(~tier) +
  labs(x = "Agent",
       y = 'linguistic_complexity')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'linguistic_complexity'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'linguistic_complexity'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
##    Data: merres
## 
## REML criterion at convergence: -1388.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3545 -0.6236 -0.0275  0.5946  4.1871 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr 
##  locutor  (Intercept) 6.400e-04 0.025298      
##           Trial       1.136e-06 0.001066 -0.59
##  Residual             3.267e-03 0.057159      
## Number of obs: 504, groups:  locutor, 21
## 
## Fixed effects:
##                    Estimate Std. Error t value
## (Intercept)       0.2680245  0.0260919  10.272
## data_conv         0.1638249  0.0723663   2.264
## AgentR            0.0565537  0.0303653   1.862
## Trial             0.0010692  0.0007787   1.373
## data_conv:AgentR -0.1415131  0.0908802  -1.557
## 
## Correlation of Fixed Effects:
##             (Intr) dt_cnv AgentR Trial 
## data_conv   -0.955                     
## AgentR      -0.812  0.833              
## Trial       -0.072 -0.117 -0.097       
## dt_cnv:AgnR  0.756 -0.791 -0.982  0.092
tab_model(mdl, title = paste("part ~ conv ", 'linguistic_complexity'))
part ~ conv linguistic_complexity
  data_part
Predictors Estimates CI p
(Intercept) 0.27 0.22 – 0.32 <0.001
data_conv 0.16 0.02 – 0.31 0.024
Agent [R] 0.06 -0.00 – 0.12 0.063
Trial 0.00 -0.00 – 0.00 0.170
data_conv * Agent [R] -0.14 -0.32 – 0.04 0.119
Random Effects
σ2 0.00
τ00 locutor 0.00
τ11 locutor.Trial 0.00
ρ01 locutor -0.59
ICC 0.14
N locutor 21
Observations 504
Marginal R2 / Conditional R2 0.016 / 0.150
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'linguistic_complexity'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))

# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
    df2[paste0('linguistic_complexity_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
    df2[paste0('linguistic_complexity_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
    df2[paste0('linguistic_complexity_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
    df2[paste0('linguistic_complexity_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
    df2[paste0('linguistic_complexity_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
    df2[paste0('linguistic_complexity_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
       aes(x = data_conv,
           y = data_part,
           color = Agent)) +
  theme_bw()

# Colored scatterplot and regression lines
basic_plot +
  geom_point(alpha = .3, 
             size = .9) +
  geom_smooth(method = "lm") +
  labs(x = "VI: linguistic_complexity Conv",
       y = "VD: linguistic_complexity Part",
       color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: linguistic_complexity Conv",
            y = "VD: linguistic_complexity Part",
            color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

content_complexity

ggplot(data, aes(x = content_complexity, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = content_complexity, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data, 
       aes(x = Agent,
           fill = Agent,  
           y = content_complexity)) +
  stat_summary(fun.y = mean,
               geom = "bar") +
  stat_summary(fun.ymin = function(x) mean(x) - sd(x), 
               fun.ymax = function(x) mean(x) + sd(x), 
               geom="errorbar", 
               width = 0.25) +
  facet_wrap(~tier) +
  labs(x = "Agent",
       y = 'content_complexity')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'content_complexity'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'content_complexity'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## boundary (singular) fit: see ?isSingular
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
##    Data: merres
## 
## REML criterion at convergence: -1620.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1014 -0.6520 -0.0157  0.5953  3.2123 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.  Corr 
##  locutor  (Intercept) 1.607e-04 0.0126759      
##           Trial       1.061e-07 0.0003257 -1.00
##  Residual             2.118e-03 0.0460196      
## Number of obs: 504, groups:  locutor, 21
## 
## Fixed effects:
##                    Estimate Std. Error t value
## (Intercept)       0.2300966  0.0174530  13.184
## data_conv         0.1075481  0.0751409   1.431
## AgentR            0.0289939  0.0228220   1.270
## Trial            -0.0006037  0.0006002  -1.006
## data_conv:AgentR -0.0966974  0.0936795  -1.032
## 
## Correlation of Fixed Effects:
##             (Intr) dt_cnv AgentR Trial 
## data_conv   -0.955                     
## AgentR      -0.710  0.732              
## Trial       -0.191 -0.014 -0.065       
## dt_cnv:AgnR  0.757 -0.802 -0.980  0.061
## convergence code: 0
## boundary (singular) fit: see ?isSingular
tab_model(mdl, title = paste("part ~ conv ", 'content_complexity'))
part ~ conv content_complexity
  data_part
Predictors Estimates CI p
(Intercept) 0.23 0.20 – 0.26 <0.001
data_conv 0.11 -0.04 – 0.25 0.152
Agent [R] 0.03 -0.02 – 0.07 0.204
Trial -0.00 -0.00 – 0.00 0.314
data_conv * Agent [R] -0.10 -0.28 – 0.09 0.302
Random Effects
σ2 0.00
τ00 locutor 0.00
τ11 locutor.Trial 0.00
ρ01 locutor -1.00
N locutor 21
Observations 504
Marginal R2 / Conditional R2 0.014 / NA
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'content_complexity'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))

# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
    df2[paste0('content_complexity_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
    df2[paste0('content_complexity_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
    df2[paste0('content_complexity_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
    df2[paste0('content_complexity_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
    df2[paste0('content_complexity_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
    df2[paste0('content_complexity_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
       aes(x = data_conv,
           y = data_part,
           color = Agent)) +
  theme_bw()

# Colored scatterplot and regression lines
basic_plot +
  geom_point(alpha = .3, 
             size = .9) +
  geom_smooth(method = "lm") +
  labs(x = "VI: content_complexity Conv",
       y = "VD: content_complexity Part",
       color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: content_complexity Conv",
            y = "VD: content_complexity Part",
            color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

ratio_silence_lgth

ggplot(data, aes(x = ratio_silence_lgth, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = ratio_silence_lgth, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data, 
       aes(x = Agent,
           fill = Agent,  
           y = ratio_silence_lgth)) +
  stat_summary(fun.y = mean,
               geom = "bar") +
  stat_summary(fun.ymin = function(x) mean(x) - sd(x), 
               fun.ymax = function(x) mean(x) + sd(x), 
               geom="errorbar", 
               width = 0.25) +
  facet_wrap(~tier) +
  labs(x = "Agent",
       y = 'ratio_silence_lgth')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'ratio_silence_lgth'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'ratio_silence_lgth'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
##    Data: merres
## 
## REML criterion at convergence: -1307
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2753 -0.6165 -0.0025  0.6903  2.8192 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr 
##  locutor  (Intercept) 1.047e-02 0.102319      
##           Trial       9.878e-06 0.003143 -0.53
##  Residual             3.395e-03 0.058267      
## Number of obs: 504, groups:  locutor, 21
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)       1.017570   0.033642  30.247
## data_conv        -0.533077   0.040433 -13.184
## AgentR            0.364294   0.060372   6.034
## Trial            -0.005688   0.001022  -5.567
## data_conv:AgentR -0.275514   0.075843  -3.633
## 
## Correlation of Fixed Effects:
##             (Intr) dt_cnv AgentR Trial 
## data_conv   -0.730                     
## AgentR      -0.176  0.215              
## Trial       -0.299 -0.039  0.069       
## dt_cnv:AgnR  0.264 -0.344 -0.987 -0.061
tab_model(mdl, title = paste("part ~ conv ", 'ratio_silence_lgth'))
part ~ conv ratio_silence_lgth
  data_part
Predictors Estimates CI p
(Intercept) 1.02 0.95 – 1.08 <0.001
data_conv -0.53 -0.61 – -0.45 <0.001
Agent [R] 0.36 0.25 – 0.48 <0.001
Trial -0.01 -0.01 – -0.00 <0.001
data_conv * Agent [R] -0.28 -0.42 – -0.13 <0.001
Random Effects
σ2 0.00
τ00 locutor 0.01
τ11 locutor.Trial 0.00
ρ01 locutor -0.53
ICC 0.73
N locutor 21
Observations 504
Marginal R2 / Conditional R2 0.229 / 0.789
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'ratio_silence_lgth'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))

# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
    df2[paste0('ratio_silence_lgth_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
    df2[paste0('ratio_silence_lgth_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
    df2[paste0('ratio_silence_lgth_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
    df2[paste0('ratio_silence_lgth_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
    df2[paste0('ratio_silence_lgth_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
    df2[paste0('ratio_silence_lgth_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
       aes(x = data_conv,
           y = data_part,
           color = Agent)) +
  theme_bw()

# Colored scatterplot and regression lines
basic_plot +
  geom_point(alpha = .3, 
             size = .9) +
  geom_smooth(method = "lm") +
  labs(x = "VI: ratio_silence_lgth Conv",
       y = "VD: ratio_silence_lgth Part",
       color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: ratio_silence_lgth Conv",
            y = "VD: ratio_silence_lgth Part",
            color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

sum_ipu_lgth

ggplot(data, aes(x = sum_ipu_lgth, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = sum_ipu_lgth, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data, 
       aes(x = Agent,
           fill = Agent,  
           y = sum_ipu_lgth)) +
  stat_summary(fun.y = mean,
               geom = "bar") +
  stat_summary(fun.ymin = function(x) mean(x) - sd(x), 
               fun.ymax = function(x) mean(x) + sd(x), 
               geom="errorbar", 
               width = 0.25) +
  facet_wrap(~tier) +
  labs(x = "Agent",
       y = 'sum_ipu_lgth')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'sum_ipu_lgth'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'sum_ipu_lgth'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
##    Data: merres
## 
## REML criterion at convergence: 2778.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8192 -0.6903  0.0025  0.6165  3.2753 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  locutor  (Intercept) 36.44323 6.0368        
##           Trial        0.03439 0.1854   -0.53
##  Residual             11.81799 3.4377        
## Number of obs: 504, groups:  locutor, 21
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)      30.41488    1.64959  18.438
## data_conv        -0.53308    0.04043 -13.184
## AgentR           -5.23801    1.11255  -4.708
## Trial             0.33559    0.06028   5.567
## data_conv:AgentR -0.27551    0.07584  -3.633
## 
## Correlation of Fixed Effects:
##             (Intr) dt_cnv AgentR Trial 
## data_conv   -0.568                     
## AgentR      -0.411  0.698              
## Trial       -0.417  0.039 -0.025       
## dt_cnv:AgnR  0.181 -0.344 -0.861  0.061
tab_model(mdl, title = paste("part ~ conv ", 'sum_ipu_lgth'))
part ~ conv sum_ipu_lgth
  data_part
Predictors Estimates CI p
(Intercept) 30.41 27.18 – 33.65 <0.001
data_conv -0.53 -0.61 – -0.45 <0.001
Agent [R] -5.24 -7.42 – -3.06 <0.001
Trial 0.34 0.22 – 0.45 <0.001
data_conv * Agent [R] -0.28 -0.42 – -0.13 <0.001
Random Effects
σ2 11.82
τ00 locutor 36.44
τ11 locutor.Trial 0.03
ρ01 locutor -0.53
ICC 0.73
N locutor 21
Observations 504
Marginal R2 / Conditional R2 0.229 / 0.789
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'sum_ipu_lgth'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))

# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
    df2[paste0('sum_ipu_lgth_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
    df2[paste0('sum_ipu_lgth_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
    df2[paste0('sum_ipu_lgth_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
    df2[paste0('sum_ipu_lgth_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
    df2[paste0('sum_ipu_lgth_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
    df2[paste0('sum_ipu_lgth_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
       aes(x = data_conv,
           y = data_part,
           color = Agent)) +
  theme_bw()

# Colored scatterplot and regression lines
basic_plot +
  geom_point(alpha = .3, 
             size = .9) +
  geom_smooth(method = "lm") +
  labs(x = "VI: sum_ipu_lgth Conv",
       y = "VD: sum_ipu_lgth Part",
       color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: sum_ipu_lgth Conv",
            y = "VD: sum_ipu_lgth Part",
            color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

qt_discourse

ggplot(data, aes(x = qt_discourse, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = qt_discourse, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data, 
       aes(x = Agent,
           fill = Agent,  
           y = qt_discourse)) +
  stat_summary(fun.y = mean,
               geom = "bar") +
  stat_summary(fun.ymin = function(x) mean(x) - sd(x), 
               fun.ymax = function(x) mean(x) + sd(x), 
               geom="errorbar", 
               width = 0.25) +
  facet_wrap(~tier) +
  labs(x = "Agent",
       y = 'qt_discourse')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'qt_discourse'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'qt_discourse'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
##    Data: merres
## 
## REML criterion at convergence: 2212.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1510 -0.6998 -0.1189  0.6175  3.7284 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  locutor  (Intercept) 2.13485  1.4611        
##           Trial       0.01509  0.1228   -0.45
##  Residual             4.07850  2.0195        
## Number of obs: 504, groups:  locutor, 21
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)       3.36665    0.42192   7.979
## data_conv        -0.12356    0.05071  -2.437
## AgentR           -0.48220    0.28817  -1.673
## Trial             0.12076    0.03759   3.213
## data_conv:AgentR -0.04365    0.15102  -0.289
## 
## Correlation of Fixed Effects:
##             (Intr) dt_cnv AgentR Trial 
## data_conv   -0.469                     
## AgentR      -0.449  0.706              
## Trial       -0.466 -0.041 -0.061       
## dt_cnv:AgnR  0.099 -0.306 -0.534  0.104
tab_model(mdl, title = paste("part ~ conv ", 'qt_discourse'))
part ~ conv qt_discourse
  data_part
Predictors Estimates CI p
(Intercept) 3.37 2.54 – 4.19 <0.001
data_conv -0.12 -0.22 – -0.02 0.015
Agent [R] -0.48 -1.05 – 0.08 0.094
Trial 0.12 0.05 – 0.19 0.001
data_conv * Agent [R] -0.04 -0.34 – 0.25 0.773
Random Effects
σ2 4.08
τ00 locutor 2.13
τ11 locutor.Trial 0.02
ρ01 locutor -0.45
ICC 0.31
N locutor 21
Observations 504
Marginal R2 / Conditional R2 0.040 / 0.342
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'qt_discourse'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))

# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
    df2[paste0('qt_discourse_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
    df2[paste0('qt_discourse_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
    df2[paste0('qt_discourse_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
    df2[paste0('qt_discourse_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
    df2[paste0('qt_discourse_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
    df2[paste0('qt_discourse_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
       aes(x = data_conv,
           y = data_part,
           color = Agent)) +
  theme_bw()

# Colored scatterplot and regression lines
basic_plot +
  geom_point(alpha = .3, 
             size = .9) +
  geom_smooth(method = "lm") +
  labs(x = "VI: qt_discourse Conv",
       y = "VD: qt_discourse Part",
       color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: qt_discourse Conv",
            y = "VD: qt_discourse Part",
            color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

qt_feedback

ggplot(data, aes(x = qt_feedback, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = qt_feedback, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data, 
       aes(x = Agent,
           fill = Agent,  
           y = qt_feedback)) +
  stat_summary(fun.y = mean,
               geom = "bar") +
  stat_summary(fun.ymin = function(x) mean(x) - sd(x), 
               fun.ymax = function(x) mean(x) + sd(x), 
               geom="errorbar", 
               width = 0.25) +
  facet_wrap(~tier) +
  labs(x = "Agent",
       y = 'qt_feedback')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'qt_feedback'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'qt_feedback'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## boundary (singular) fit: see ?isSingular
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
##    Data: merres
## 
## REML criterion at convergence: 2452.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5178 -0.6009 -0.0654  0.5377  5.5439 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr 
##  locutor  (Intercept) 4.1391928 2.03450       
##           Trial       0.0007844 0.02801  -1.00
##  Residual             6.7044221 2.58929       
## Number of obs: 504, groups:  locutor, 21
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)       6.51675    0.66072   9.863
## data_conv         0.06761    0.04592   1.472
## AgentR           -1.92219    0.53546  -3.590
## Trial            -0.09987    0.03425  -2.916
## data_conv:AgentR -0.06397    0.10826  -0.591
## 
## Correlation of Fixed Effects:
##             (Intr) dt_cnv AgentR Trial 
## data_conv   -0.640                     
## AgentR      -0.542  0.736              
## Trial       -0.472  0.126  0.077       
## dt_cnv:AgnR  0.252 -0.406 -0.776 -0.024
## convergence code: 0
## boundary (singular) fit: see ?isSingular
tab_model(mdl, title = paste("part ~ conv ", 'qt_feedback'))
part ~ conv qt_feedback
  data_part
Predictors Estimates CI p
(Intercept) 6.52 5.22 – 7.81 <0.001
data_conv 0.07 -0.02 – 0.16 0.141
Agent [R] -1.92 -2.97 – -0.87 <0.001
Trial -0.10 -0.17 – -0.03 0.004
data_conv * Agent [R] -0.06 -0.28 – 0.15 0.555
Random Effects
σ2 6.70
τ00 locutor 4.14
τ11 locutor.Trial 0.00
ρ01 locutor -1.00
N locutor 21
Observations 504
Marginal R2 / Conditional R2 0.205 / NA
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'qt_feedback'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))

# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
    df2[paste0('qt_feedback_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
    df2[paste0('qt_feedback_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
    df2[paste0('qt_feedback_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
    df2[paste0('qt_feedback_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
    df2[paste0('qt_feedback_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
    df2[paste0('qt_feedback_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
       aes(x = data_conv,
           y = data_part,
           color = Agent)) +
  theme_bw()

# Colored scatterplot and regression lines
basic_plot +
  geom_point(alpha = .3, 
             size = .9) +
  geom_smooth(method = "lm") +
  labs(x = "VI: qt_feedback Conv",
       y = "VD: qt_feedback Part",
       color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: qt_feedback Conv",
            y = "VD: qt_feedback Part",
            color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

qt_filled_pause

ggplot(data, aes(x = qt_filled_pause, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = qt_filled_pause, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data, 
       aes(x = Agent,
           fill = Agent,  
           y = qt_filled_pause)) +
  stat_summary(fun.y = mean,
               geom = "bar") +
  stat_summary(fun.ymin = function(x) mean(x) - sd(x), 
               fun.ymax = function(x) mean(x) + sd(x), 
               geom="errorbar", 
               width = 0.25) +
  facet_wrap(~tier) +
  labs(x = "Agent",
       y = 'qt_filled_pause')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'qt_filled_pause'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'qt_filled_pause'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
##    Data: merres
## 
## REML criterion at convergence: 2016.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0423 -0.5926 -0.1137  0.5403  3.4174 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  locutor  (Intercept) 3.0599   1.74925      
##           Trial       0.0076   0.08718  0.04
##  Residual             2.6686   1.63357      
## Number of obs: 504, groups:  locutor, 21
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)       2.73863    0.47950   5.711
## data_conv        -0.07779    0.04332  -1.796
## AgentR           -0.08165    0.27987  -0.292
## Trial             0.03905    0.02854   1.369
## data_conv:AgentR -0.12397    0.24630  -0.503
## 
## Correlation of Fixed Effects:
##             (Intr) dt_cnv AgentR Trial 
## data_conv   -0.511                     
## AgentR      -0.511  0.838              
## Trial       -0.191  0.065  0.067       
## dt_cnv:AgnR  0.111 -0.170 -0.305 -0.085
tab_model(mdl, title = paste("part ~ conv ", 'qt_filled_pause'))
part ~ conv qt_filled_pause
  data_part
Predictors Estimates CI p
(Intercept) 2.74 1.80 – 3.68 <0.001
data_conv -0.08 -0.16 – 0.01 0.073
Agent [R] -0.08 -0.63 – 0.47 0.770
Trial 0.04 -0.02 – 0.09 0.171
data_conv * Agent [R] -0.12 -0.61 – 0.36 0.615
Random Effects
σ2 2.67
τ00 locutor 3.06
τ11 locutor.Trial 0.01
ρ01 locutor 0.04
ICC 0.56
N locutor 21
Observations 504
Marginal R2 / Conditional R2 0.011 / 0.569
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'qt_filled_pause'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))

# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
    df2[paste0('qt_filled_pause_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
    df2[paste0('qt_filled_pause_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
    df2[paste0('qt_filled_pause_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
    df2[paste0('qt_filled_pause_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
    df2[paste0('qt_filled_pause_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
    df2[paste0('qt_filled_pause_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
       aes(x = data_conv,
           y = data_part,
           color = Agent)) +
  theme_bw()

# Colored scatterplot and regression lines
basic_plot +
  geom_point(alpha = .3, 
             size = .9) +
  geom_smooth(method = "lm") +
  labs(x = "VI: qt_filled_pause Conv",
       y = "VD: qt_filled_pause Part",
       color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: qt_filled_pause Conv",
            y = "VD: qt_filled_pause Part",
            color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)
## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive

## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive

ratio_discourse

ggplot(data, aes(x = ratio_discourse, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = ratio_discourse, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data, 
       aes(x = Agent,
           fill = Agent,  
           y = ratio_discourse)) +
  stat_summary(fun.y = mean,
               geom = "bar") +
  stat_summary(fun.ymin = function(x) mean(x) - sd(x), 
               fun.ymax = function(x) mean(x) + sd(x), 
               geom="errorbar", 
               width = 0.25) +
  facet_wrap(~tier) +
  labs(x = "Agent",
       y = 'ratio_discourse')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'ratio_discourse'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'ratio_discourse'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
##    Data: merres
## 
## REML criterion at convergence: -2179.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0186 -0.7284 -0.0450  0.5498  4.4077 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.  Corr 
##  locutor  (Intercept) 9.029e-05 0.0095019      
##           Trial       6.985e-07 0.0008357 -0.53
##  Residual             6.793e-04 0.0260635      
## Number of obs: 504, groups:  locutor, 21
## 
## Fixed effects:
##                    Estimate Std. Error t value
## (Intercept)       0.0402339  0.0043781   9.190
## data_conv        -0.0349661  0.0712201  -0.491
## AgentR            0.0026618  0.0040547   0.656
## Trial             0.0007880  0.0003843   2.050
## data_conv:AgentR  0.0382356  0.1070018   0.357
## 
## Correlation of Fixed Effects:
##             (Intr) dt_cnv AgentR Trial 
## data_conv   -0.675                     
## AgentR      -0.650  0.757              
## Trial       -0.456 -0.053 -0.065       
## dt_cnv:AgnR  0.422 -0.667 -0.740  0.095
tab_model(mdl, title = paste("part ~ conv ", 'ratio_discourse'))
part ~ conv ratio_discourse
  data_part
Predictors Estimates CI p
(Intercept) 0.04 0.03 – 0.05 <0.001
data_conv -0.03 -0.17 – 0.10 0.623
Agent [R] 0.00 -0.01 – 0.01 0.512
Trial 0.00 0.00 – 0.00 0.040
data_conv * Agent [R] 0.04 -0.17 – 0.25 0.721
Random Effects
σ2 0.00
τ00 locutor 0.00
τ11 locutor.Trial 0.00
ρ01 locutor -0.53
ICC 0.10
N locutor 21
Observations 504
Marginal R2 / Conditional R2 0.016 / 0.112
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'ratio_discourse'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))

# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
    df2[paste0('ratio_discourse_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
    df2[paste0('ratio_discourse_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
    df2[paste0('ratio_discourse_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
    df2[paste0('ratio_discourse_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
    df2[paste0('ratio_discourse_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
    df2[paste0('ratio_discourse_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
       aes(x = data_conv,
           y = data_part,
           color = Agent)) +
  theme_bw()

# Colored scatterplot and regression lines
basic_plot +
  geom_point(alpha = .3, 
             size = .9) +
  geom_smooth(method = "lm") +
  labs(x = "VI: ratio_discourse Conv",
       y = "VD: ratio_discourse Part",
       color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: ratio_discourse Conv",
            y = "VD: ratio_discourse Part",
            color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

nratio_feedback

ggplot(data, aes(x = nratio_feedback, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = nratio_feedback, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data, 
       aes(x = Agent,
           fill = Agent,  
           y = nratio_feedback)) +
  stat_summary(fun.y = mean,
               geom = "bar") +
  stat_summary(fun.ymin = function(x) mean(x) - sd(x), 
               fun.ymax = function(x) mean(x) + sd(x), 
               geom="errorbar", 
               width = 0.25) +
  facet_wrap(~tier) +
  labs(x = "Agent",
       y = 'nratio_feedback')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'nratio_feedback'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'nratio_feedback'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## boundary (singular) fit: see ?isSingular
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
##    Data: merres
## 
## REML criterion at convergence: -516.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6981 -0.6881 -0.0724  0.6373  3.7800 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr 
##  locutor  (Intercept) 5.539e-03 0.074425      
##           Trial       2.052e-06 0.001432 -1.00
##  Residual             1.838e-02 0.135570      
## Number of obs: 504, groups:  locutor, 21
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)       0.420656   0.030614  13.741
## data_conv        -0.109171   0.059408  -1.838
## AgentR           -0.141563   0.030281  -4.675
## Trial            -0.005980   0.001782  -3.355
## data_conv:AgentR -0.001098   0.086784  -0.013
## 
## Correlation of Fixed Effects:
##             (Intr) dt_cnv AgentR Trial 
## data_conv   -0.736                     
## AgentR      -0.638  0.750              
## Trial       -0.355 -0.063 -0.025       
## dt_cnv:AgnR  0.499 -0.664 -0.892  0.009
## convergence code: 0
## boundary (singular) fit: see ?isSingular
tab_model(mdl, title = paste("part ~ conv ", 'nratio_feedback'))
part ~ conv nratio_feedback
  data_part
Predictors Estimates CI p
(Intercept) 0.42 0.36 – 0.48 <0.001
data_conv -0.11 -0.23 – 0.01 0.066
Agent [R] -0.14 -0.20 – -0.08 <0.001
Trial -0.01 -0.01 – -0.00 0.001
data_conv * Agent [R] -0.00 -0.17 – 0.17 0.990
Random Effects
σ2 0.02
τ00 locutor 0.01
τ11 locutor.Trial 0.00
ρ01 locutor -1.00
ICC 0.20
N locutor 21
Observations 504
Marginal R2 / Conditional R2 0.172 / 0.333
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'nratio_feedback'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))

# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
    df2[paste0('nratio_feedback_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
    df2[paste0('nratio_feedback_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
    df2[paste0('nratio_feedback_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
    df2[paste0('nratio_feedback_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
    df2[paste0('nratio_feedback_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
    df2[paste0('nratio_feedback_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
       aes(x = data_conv,
           y = data_part,
           color = Agent)) +
  theme_bw()

# Colored scatterplot and regression lines
basic_plot +
  geom_point(alpha = .3, 
             size = .9) +
  geom_smooth(method = "lm") +
  labs(x = "VI: nratio_feedback Conv",
       y = "VD: nratio_feedback Part",
       color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: nratio_feedback Conv",
            y = "VD: nratio_feedback Part",
            color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

ratio_filled_pause

ggplot(data, aes(x = ratio_filled_pause, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = ratio_filled_pause, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data, 
       aes(x = Agent,
           fill = Agent,  
           y = ratio_filled_pause)) +
  stat_summary(fun.y = mean,
               geom = "bar") +
  stat_summary(fun.ymin = function(x) mean(x) - sd(x), 
               fun.ymax = function(x) mean(x) + sd(x), 
               geom="errorbar", 
               width = 0.25) +
  facet_wrap(~tier) +
  labs(x = "Agent",
       y = 'ratio_filled_pause')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'ratio_filled_pause'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'ratio_filled_pause'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## boundary (singular) fit: see ?isSingular
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
##    Data: merres
## 
## REML criterion at convergence: -2211.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7298 -0.6161 -0.1710  0.5228  4.0181 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.  Corr 
##  locutor  (Intercept) 5.145e-04 2.268e-02      
##           Trial       3.793e-09 6.159e-05 -1.00
##  Residual             5.974e-04 2.444e-02      
## Number of obs: 504, groups:  locutor, 21
## 
## Fixed effects:
##                    Estimate Std. Error t value
## (Intercept)       3.042e-02  6.310e-03   4.821
## data_conv         3.437e-02  4.658e-02   0.738
## AgentR            9.434e-03  3.766e-03   2.505
## Trial            -8.149e-05  3.191e-04  -0.255
## data_conv:AgentR -6.087e-02  1.352e-01  -0.450
## 
## Correlation of Fixed Effects:
##             (Intr) dt_cnv AgentR Trial 
## data_conv   -0.499                     
## AgentR      -0.502  0.796              
## Trial       -0.353  0.090  0.092       
## dt_cnv:AgnR  0.209 -0.359 -0.453 -0.140
## convergence code: 0
## boundary (singular) fit: see ?isSingular
tab_model(mdl, title = paste("part ~ conv ", 'ratio_filled_pause'))
part ~ conv ratio_filled_pause
  data_part
Predictors Estimates CI p
(Intercept) 0.03 0.02 – 0.04 <0.001
data_conv 0.03 -0.06 – 0.13 0.461
Agent [R] 0.01 0.00 – 0.02 0.012
Trial -0.00 -0.00 – 0.00 0.798
data_conv * Agent [R] -0.06 -0.33 – 0.20 0.653
Random Effects
σ2 0.00
τ00 locutor 0.00
τ11 locutor.Trial 0.00
ρ01 locutor -1.00
ICC 0.46
N locutor 21
Observations 504
Marginal R2 / Conditional R2 0.012 / 0.462
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'ratio_filled_pause'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))

# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
    df2[paste0('ratio_filled_pause_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
    df2[paste0('ratio_filled_pause_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
    df2[paste0('ratio_filled_pause_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
    df2[paste0('ratio_filled_pause_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
    df2[paste0('ratio_filled_pause_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
    df2[paste0('ratio_filled_pause_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
       aes(x = data_conv,
           y = data_part,
           color = Agent)) +
  theme_bw()

# Colored scatterplot and regression lines
basic_plot +
  geom_point(alpha = .3, 
             size = .9) +
  geom_smooth(method = "lm") +
  labs(x = "VI: ratio_filled_pause Conv",
       y = "VD: ratio_filled_pause Part",
       color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: ratio_filled_pause Conv",
            y = "VD: ratio_filled_pause Part",
            color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)
## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive

## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive

mean_ipu_lgth

ggplot(data, aes(x = mean_ipu_lgth, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = mean_ipu_lgth, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data, 
       aes(x = Agent,
           fill = Agent,  
           y = mean_ipu_lgth)) +
  stat_summary(fun.y = mean,
               geom = "bar") +
  stat_summary(fun.ymin = function(x) mean(x) - sd(x), 
               fun.ymax = function(x) mean(x) + sd(x), 
               geom="errorbar", 
               width = 0.25) +
  facet_wrap(~tier) +
  labs(x = "Agent",
       y = 'mean_ipu_lgth')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'mean_ipu_lgth'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'mean_ipu_lgth'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
##    Data: merres
## 
## REML criterion at convergence: 391.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.3019 -0.6577 -0.0919  0.5407  5.2737 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr
##  locutor  (Intercept) 4.054e-02 0.201348     
##           Trial       3.046e-05 0.005519 1.00
##  Residual             1.099e-01 0.331460     
## Number of obs: 504, groups:  locutor, 21
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)       1.519381   0.094101  16.146
## data_conv        -0.073846   0.047409  -1.558
## AgentR           -0.318601   0.124662  -2.556
## Trial             0.031947   0.004471   7.146
## data_conv:AgentR  0.070538   0.091629   0.770
## 
## Correlation of Fixed Effects:
##             (Intr) dt_cnv AgentR Trial 
## data_conv   -0.819                     
## AgentR      -0.572  0.646              
## Trial       -0.028 -0.105 -0.042       
## dt_cnv:AgnR  0.438 -0.526 -0.957  0.025
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
tab_model(mdl, title = paste("part ~ conv ", 'mean_ipu_lgth'))
part ~ conv mean_ipu_lgth
  data_part
Predictors Estimates CI p
(Intercept) 1.52 1.33 – 1.70 <0.001
data_conv -0.07 -0.17 – 0.02 0.119
Agent [R] -0.32 -0.56 – -0.07 0.011
Trial 0.03 0.02 – 0.04 <0.001
data_conv * Agent [R] 0.07 -0.11 – 0.25 0.441
Random Effects
σ2 0.11
τ00 locutor 0.04
τ11 locutor.Trial 0.00
ρ01 locutor 1.00
ICC 0.33
N locutor 21
Observations 504
Marginal R2 / Conditional R2 0.119 / 0.409
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'mean_ipu_lgth'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))

# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
    df2[paste0('mean_ipu_lgth_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
    df2[paste0('mean_ipu_lgth_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
    df2[paste0('mean_ipu_lgth_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
    df2[paste0('mean_ipu_lgth_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
    df2[paste0('mean_ipu_lgth_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
    df2[paste0('mean_ipu_lgth_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
       aes(x = data_conv,
           y = data_part,
           color = Agent)) +
  theme_bw()

# Colored scatterplot and regression lines
basic_plot +
  geom_point(alpha = .3, 
             size = .9) +
  geom_smooth(method = "lm") +
  labs(x = "VI: mean_ipu_lgth Conv",
       y = "VD: mean_ipu_lgth Part",
       color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: mean_ipu_lgth Conv",
            y = "VD: mean_ipu_lgth Part",
            color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

speech_rate_min4

ggplot(data, aes(x = speech_rate_min4, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = speech_rate_min4, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data, 
       aes(x = Agent,
           fill = Agent,  
           y = speech_rate_min4)) +
  stat_summary(fun.y = mean,
               geom = "bar") +
  stat_summary(fun.ymin = function(x) mean(x) - sd(x), 
               fun.ymax = function(x) mean(x) + sd(x), 
               geom="errorbar", 
               width = 0.25) +
  facet_wrap(~tier) +
  labs(x = "Agent",
       y = 'speech_rate_min4')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'speech_rate_min4'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'speech_rate_min4'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00639831 (tol = 0.002, component 1)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
##    Data: merres
## 
## REML criterion at convergence: 819
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.72952 -0.63845  0.00567  0.60109  2.93797 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  locutor  (Intercept) 0.400256 0.63266       
##           Trial       0.001588 0.03985  -0.47
##  Residual             0.238110 0.48797       
## Number of obs: 504, groups:  locutor, 21
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)       4.97586    0.34129  14.579
## data_conv         0.06038    0.05717   1.056
## AgentR            0.84174    0.47147   1.785
## Trial            -0.01083    0.01077  -1.006
## data_conv:AgentR -0.20130    0.09157  -2.198
## 
## Correlation of Fixed Effects:
##             (Intr) dt_cnv AgentR Trial 
## data_conv   -0.904                     
## AgentR      -0.604  0.664              
## Trial       -0.144 -0.075 -0.061       
## dt_cnv:AgnR  0.562 -0.624 -0.994  0.059
## convergence code: 0
## Model failed to converge with max|grad| = 0.00639831 (tol = 0.002, component 1)
tab_model(mdl, title = paste("part ~ conv ", 'speech_rate_min4'))
part ~ conv speech_rate_min4
  data_part
Predictors Estimates CI p
(Intercept) 4.98 4.31 – 5.64 <0.001
data_conv 0.06 -0.05 – 0.17 0.291
Agent [R] 0.84 -0.08 – 1.77 0.074
Trial -0.01 -0.03 – 0.01 0.315
data_conv * Agent [R] -0.20 -0.38 – -0.02 0.028
Random Effects
σ2 0.24
τ00 locutor 0.40
τ11 locutor.Trial 0.00
ρ01 locutor -0.47
ICC 0.59
N locutor 21
Observations 504
Marginal R2 / Conditional R2 0.020 / 0.595
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'speech_rate_min4'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))

# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
    df2[paste0('speech_rate_min4_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
    df2[paste0('speech_rate_min4_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
    df2[paste0('speech_rate_min4_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
    df2[paste0('speech_rate_min4_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
    df2[paste0('speech_rate_min4_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
    df2[paste0('speech_rate_min4_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
       aes(x = data_conv,
           y = data_part,
           color = Agent)) +
  theme_bw()

# Colored scatterplot and regression lines
basic_plot +
  geom_point(alpha = .3, 
             size = .9) +
  geom_smooth(method = "lm") +
  labs(x = "VI: speech_rate_min4 Conv",
       y = "VD: speech_rate_min4 Part",
       color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) + 
        geom_point(alpha = 0.7) + 
        geom_density_2d(alpha=0.5) + 
        theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
        labs(x = "VI: speech_rate_min4 Conv",
            y = "VD: speech_rate_min4 Part",
            color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

Saver

if (rstudioapi::isAvailable()){
  file_path = file.path(dirname(rstudioapi::getSourceEditorContext()$path), 'summary.xlsx')
} else {
  file_path = file.path(getwd(), 'summary.xlsx')
}
# Write the first data set in a new workbook
write.xlsx(df_overall, file = file_path,
      sheetName = 'models', append = TRUE)
# Write others sheets

write.xlsx(df2, file = file_path,
      sheetName = 'hr_comparison', append = TRUE)